src/AllCells/[RC17]Clustering.Rmd · MonolayerCultures

---
title: '[RC17]Clustering'
author: "Nina-Lydia Kazakou"
date: "04/03/2022"
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

# Set-up

### Load libraries

```{r message=FALSE, warning=FALSE}
library(SingleCellExperiment)
library(Seurat)
library(tidyverse)
library(Matrix)
library(scales)
library(cowplot)
library(RCurl)
library(scDblFinder)
library(Matrix)
library(ggplot2)
library(edgeR)
library(dplyr)
library(ggsci)  
library(here)
```

### Colour Palette

```{r load_palette}
mypal <- pal_npg("nrc", alpha = 0.7)(10)
mypal2 <-pal_tron("legacy", alpha = 0.7)(7)
mypal3<-pal_lancet("lanonc", alpha = 0.7)(9)
mypal4<-pal_simpsons(palette = c("springfield"), alpha = 0.7)(16)
mypal5 <- pal_rickandmorty(palette = c("schwifty"), alpha = 0.7)(6)
mypal6 <- pal_futurama(palette = c("planetexpress"), alpha = 0.7)(5)
mypal7 <- pal_startrek(palette = c("uniform"), alpha = 0.7)(5)
mycoloursP<- c(mypal, mypal2, mypal3, mypal4)
```

### Load object

```{r}
RC17_norm <- readRDS(here("data", "Processed", "AllCells", "RC17_norm_srt.rds"))
```

# Clustering

### Some more QC Plots

```{r fig.height=6, fig.width=10}
Idents (RC17_norm) <- "Sample"

VlnPlot(RC17_norm, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"),ncol = 3, pt.size = 0.1, cols = mycoloursP[3:40])
plot1 <- FeatureScatter(RC17_norm, feature1 = "nCount_RNA", feature2 = "percent.mt", cols = mycoloursP[3:40])
plot2 <- FeatureScatter(RC17_norm, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", cols = mycoloursP[3:40])
plot1 + plot2
```

## Identify Variable Features:

```{r}
RC17_norm <- FindVariableFeatures(RC17_norm, selection.method = "vst", nfeatures = 2000)
```

## Identify the 10 most highly variable genes:

```{r message=FALSE, warning=FALSE}
top10 <- head(VariableFeatures(RC17_norm), 10)
plotA <- VariableFeaturePlot(RC17_norm, cols = mycoloursP[4:5])
plotB <- LabelPoints(plot = plotA, points = top10, repel = TRUE)
plotB
```

## Scale all genes (if there is a batch effect I could add vars to regress here):

```{r}
all.genes <- rownames(RC17_norm)
RC17_norm <- ScaleData(RC17_norm, features = all.genes)
```

## Linear dimensional reduction:

```{r}
RC17_norm <- RunPCA(RC17_norm, features = VariableFeatures(RC17_norm), npcs = 100)
```

```{r}
VizDimLoadings(RC17_norm, dims = 1:2, reduction = "pca")
```

```{r}
DimPlot(RC17_norm, reduction = "pca", cols= mycoloursP[22:50])

DimHeatmap(RC17_norm, dims = 1, cells = 500, balanced = TRUE)
```

```{r fig.height=16, fig.width=10}
DimHeatmap(RC17_norm, dims = 1:20, cells = 500, balanced = TRUE)
```

```{r fig.height=6, fig.width=10}
PCAPlot(RC17_norm, split.by = "orig.ident", cols= mycoloursP[22:50])
```

## Dimentionality Reduction:

```{r}
ElbowPlot(RC17_norm)
```

```{r}
RC17_norm <- FindNeighbors(RC17_norm, dims = 1:18, verbose = FALSE)
RC17_norm <- FindClusters(RC17_norm, resolution = c(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0), verbose = FALSE)
head(RC17_norm@meta.data, 2)
```

Load original object:
```{r}
RC17_norm <- readRDS(here("data", "Processed", "Original_Objects", "ScaterQC_ScranNorm_ScaleDataSeurat.rds"))
```

I have previously examined different resolutions for clustering these data and decided that *RNA_snn_res.0.8* works best, based both on approximation models and general cell-type markers found in literature.

```{r message=FALSE}
Idents(object = RC17_norm) <- "RNA_snn_res.0.8" #assign cluster identity
RC17_norm <- RunUMAP(RC17_norm, dims = 1:18, reduction = "pca")
```

```{r}
DimPlot(RC17_norm, reduction = "umap", label = TRUE, pt.size = 0.8, cols = mycoloursP)
DimPlot(RC17_norm, reduction = "pca", label = F, cols = mycoloursP, pt.size = 1)
```

Now, I will check the contribution of each sample to each cluster, to see if clusters are made up solely by cells derived from ont timepoint:

```{r}
IDsPerCluster <- as.data.frame(tapply(RC17_norm@meta.data$Sample, 
                                      RC17_norm@meta.data$RNA_snn_res.0.8, 
                                      function(x) length(unique(x)) ))
names(IDsPerCluster) <- "NumberOfIDs"
IDsPerCluster$RNA_snn_res.0.8 <- rownames(IDsPerCluster)
IDsPerCluster$Cluster <- rownames(IDsPerCluster)
head(IDsPerCluster,2)

#write.csv(IDsPerCluster, here("outs", "Processed", "AllCells", "ClusterContribution_RNA_snn_res.0.8.csv"))
```

It seems like all clusters are made up by cells from all three timepoints. Later on, I will also check how many cells from each timepoint make up each cluster for my chosen resolution.

## Cell-cycle scoring

```{r warning=FALSE}
RC17_norm <- CellCycleScoring(object = RC17_norm, g2m.features = cc.genes$g2m.genes,s.features = cc.genes$s.genes)
head((RC17_norm@meta.data))
```

```{r fig.height=6, fig.width=12}
VlnPlot(RC17_norm, features = c("S.Score","G2M.Score"), pt.size = 0.01, cols = mycoloursP)
```

Some more plots:

```{r Doublets}
DimPlot(RC17_norm, reduction = "umap", 
        label = F, 
        group.by = "scDblFinder.class",
        cols = c(mycoloursP[25] , mycoloursP[26]),
        pt.size = 0.6)
```

```{r Cell-Cycle}
DimPlot(RC17_norm, reduction = "umap", 
        label = F, 
        group.by = "Phase",
        cols = mycoloursP[27:50],
        pt.size = 0.6)
```

```{r fig.height=8, fig.width=15}
DimPlot(RC17_norm, reduction = "umap", 
        label = T, 
        group.by = "RNA_snn_res.0.8",
        split.by = "Phase",
        cols = mycoloursP[27:50],
        pt.size = 0.6)
```

## Cluster Annotation
```{r}
Idents(RC17_norm) <- "RNA_snn_res.0.8"

DefaultAssay(RC17_norm) <- "RNA"
```

```{r Oligodendroglia}
FeaturePlot(RC17_norm, 
            reduction = "umap", 
            features = c("OLIG2", "OLIG1", "OLIG3", "SOX10"), 
            order = TRUE,
            min.cutoff = 'q10', 
            label = TRUE, cols = c("lightgrey", "red"), ncol = 2)
```

```{r OPCs, fig.height=12, fig.width=8}
FeaturePlot(RC17_norm, 
            features = c("PDGFRA", 
                         "CSPG4", 
                         "GPR17", 
                         "PTPRZ1",
                         "PCDH15", 
                         "PTGDS",
                         "BCAN",
                         "CABLES1", 
                         "GFRA1", 
                         "LINC01965"), 
            order = TRUE,
            min.cutoff = 'q10', 
            label = TRUE, cols = c("lightgrey", "red"), ncol = 2)
```

```{r COPs, fig.height=16, fig.width=8}
FeaturePlot(RC17_norm, 
            features = c("ETV1", 
                         "CHST9", 
                         "MYT1", 
                         "TENM2", 
                         "CAMK2A", 
                         "KCNQ5", 
                         "SEMA5B",
                         "SYT1",
                         "GPR17",
                         "BMPER",
                         "EPHB1",
                         "ARHGAP24",
                         "DOCK8"), 
            order = TRUE,
            min.cutoff = 'q10', 
            label = TRUE, cols = c("lightgrey", "red"), ncol = 2)
```

```{r Oligodendrocytes, fig.height=14, fig.width=8}
FeaturePlot(RC17_norm, 
            reduction = "umap", 
            features = c("MBP", "PLP1", "CNP", "MAG", "MOG", "BCAS1", "ZNF488", "GALC", "MOBP"), 
            order = TRUE,
            min.cutoff = 'q10', 
            label = TRUE, cols = c("lightgrey", "red"), ncol = 2)
```

```{r Mature_Oligodendrocytes, fig.height=18, fig.width=8}
FeaturePlot(RC17_norm, 
            reduction = "umap", 
            features = c("SNAP25", 
                         "CNTN1", 
                         "FRY", 
                         "PLXDC2", 
                         "DYSF", 
                         "PTPRM", 
                         "FOS",
                         "VIM",
                         "JUNB",
                         "AFF3",
                         "KANK4",
                         "FSTL5",
                         "SGCZ",
                         "MDGA2"), 
            order = TRUE,
            min.cutoff = 'q10', 
            label = TRUE, cols = c("lightgrey", "red"), ncol = 2)
```

```{r TF, fig.height=8, fig.width=8}
FeaturePlot(RC17_norm, 
            reduction = "umap", 
            features = c("SOX4", "SOX11", "MLLT11", "ZNF711", "EZH2", "DACH2"), 
            order = TRUE,
            min.cutoff = 'q10', 
            label = TRUE, cols = c("lightgrey", "red"), ncol = 2)
```

```{r Radial_Glial, Neuroprogenitors, fig.height=8, fig.width=8}
FeaturePlot(RC17_norm, 
            reduction = "umap", 
            features = c( "PAX6", "HES1", "HES5", "SOX2", "CDH2", "VIM"), 
            order = TRUE,
            min.cutoff = 'q10', 
            label = TRUE, cols = c("lightgrey", "darkorchid"), ncol = 2)
```

```{r Astrocytes, fig.height=20, fig.width=8}
FeaturePlot(RC17_norm, 
            reduction = "umap", 
            features = c("S100B", "SLC1A3", "EFNB3", "TFF3", "SPARCL1", "SPON1", "TGFB2", "GJA1",
                         "AQP4", 
                         "GLUL", 
                         "SOX9", 
                         "NDRG2", 
                         "GFAP", 
                         "ALDH1A1",  
                         "APOE", 
                         "FGFR3"), 
            order = TRUE,
            min.cutoff = 'q10', 
            label = TRUE, cols = c("lightgrey", "darkgreen"), ncol = 2)
```

```{r Astrocytes_2, fig.height=28, fig.width=8}
FeaturePlot(RC17_norm, 
            reduction = "umap", 
            features = ,c( "TENM2",  
                           "CCDC85A", 
                           "SLC38A1",
                           "GALNT15",
                           "BNC2",
                           "CFAP299",
                           "SPAG17",
                           "DTHD1",
                           "ADGB",
                           "UAP1",
                           "SPOCD1",
                           "CLCF1",
                           "SORCS1",
                           "SAMHD1",
                           "NRGN",
                           "CAMK2A",
                           "THY1",
                           "ENC1",
                           "SYT1",
                           "CALM3",
                           "ST18",
                           "CTNNA3",
                           "APLNR"), 
            order = TRUE,
            min.cutoff = 'q10', 
            label = TRUE, cols = c("lightgrey", "darkgreen"), ncol = 2)
```

```{r Pericytes, fig.height=12, fig.width=8}
FeaturePlot(RC17_norm, 
            reduction = "umap", 
            features = c("PDGFRB", "COL1A1", "COL1A2", "ACTA2", "SPARC", "BGN", "S100A10"), 
            order = TRUE,
            min.cutoff = 'q10', 
            label = TRUE, cols = c("lightgrey", "darkorange"), ncol = 2)
```

```{r Neurons, fig.height=6, fig.width=8}
FeaturePlot(RC17_norm, features = c("SNAP25", "STMN2", "RBFOX3", "GABRB2"),order = TRUE, 
            min.cutoff = 'q10', 
            label = TRUE, cols = c("lightgrey", "darkblue"), ncol = 2)
```

```{r Inhibitory_Neurons, fig.height=24, fig.width=8}
FeaturePlot(RC17_norm, features = c("LAMA3", "VIP", "TAC3", "RYR3", "TSHZ2", 
                                    "RELN", "IL1RAPL2", "CXCL14", "EYA4",
                                    "FBXL7", "KIT", "MEIS2", "PBX3", "PLCL1", 
                                    "MYO5B", "TRHDE", "PLCH1"),
            order = TRUE, 
            min.cutoff = 'q10', 
            label = TRUE, cols = c("lightgrey", "darkblue"), ncol = 2)
```

```{r Ecitatory_Neurons}
FeaturePlot(RC17_norm, features = c("SATB2", "SLC12A6", "FEZF2","RORB"),order = TRUE, 
            min.cutoff = 'q10', 
            label = TRUE, cols = c("lightgrey", "darkblue"), ncol = 2)
```

```{r Glutaminergic_Neurons}
FeaturePlot(RC17_norm, 
            reduction = "umap", 
            features = c("GLS", "GRIN2B", "SLC17A6"), 
            order = TRUE,
            min.cutoff = 'q10', 
            label = TRUE, cols = c("lightgrey", "darkblue"), ncol = 2)
```

```{r Serotonergic_Neurons, fig.height=3, fig.width=8}
FeaturePlot(RC17_norm, 
            reduction = "umap", 
            features = c("SLC6A4", "TPH1"), 
            order = TRUE,
            min.cutoff = 'q10', 
            label = TRUE, cols = c("lightgrey", "darkblue"), ncol = 2)
```

```{r Neuronal_Receptors, fig.height=3, fig.width=8}
FeaturePlot(RC17_norm, 
            reduction = "umap", 
            features = c("GABBR1", "SLC32A1"), 
            order = TRUE,
            min.cutoff = 'q10', 
            label = TRUE, cols = c("lightgrey", "darkblue"), ncol = 2)
```

```{r General_Neuronal_Markers, fig.height=18, fig.width=8}
FeaturePlot(RC17_norm, 
            reduction = "umap", 
            features = c("DCX", "DLG4", "FOS", "FOXG1", "GPHN", "NCAM1", "NEFL", "NEUROD1", "RBFOX3", "RBFOX1", "MAP2", "SYP", "SYNPR", "TUBB3"), 
            order = TRUE,
            min.cutoff = 'q10', 
            label = TRUE, cols = c("lightgrey", "darkblue"), ncol = 2)
```

```{r Microglia}
FeaturePlot(RC17_norm, 
            reduction = "umap", 
            features = c("CX3CR1", "CD68", "CD40"), 
            order = TRUE,
            min.cutoff = 'q10', 
            label = TRUE, cols = c("lightgrey", "purple"), ncol = 2)
```

```{r Interesting_post-mortem_Markers}
FeaturePlot(RC17_norm, 
            reduction = "umap", 
            features = c("RBFOX1",
                         "SPARC",
                         "NELL1"), 
            order = TRUE,
            min.cutoff = 'q10', 
            label = TRUE, cols = c("lightgrey", "darkorchid3"), ncol = 2)
```

## Assign ClusterID

```{r message=FALSE, warning=FALSE}
RC17_norm$ClusterID <- RC17_norm$RNA_snn_res.0.8
current.ids <- c(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23)
new.ids <- c("Radial_Glia_1", "Radial_Glia/Pre-OPCs_1", "Astrocytes_1", "Astrocytes_2", "Oligodendroglia_1", "Unknown", "Radial_Glia_2", "Outer-Radial_Glia_1", "Outer-Radial_Glia_2", "Radial_Glia/Pre-OPCs_2", "Neurons_1", "Neurons_2", "Oligodendroglia_2", "Radial_Glia_3", "Oligodendroglia_3", "Neurons_3", "Neurons_3", "Unknown", "Oligodendroglia_4", "Oligodendroglia_5", "Oligodendroglia_6", "Unknown", "Pericytes", "Outer-Radial_Glia_3")
RC17_norm@meta.data$ClusterID <- plyr::mapvalues(x = RC17_norm@meta.data$ClusterID, from = current.ids, to = new.ids)
head(RC17_norm@meta.data, 2)
```


```{r fig.height=8, fig.width=8}
DimPlot(RC17_norm, reduction = "umap", pt.size = 0.8, group.by = "ClusterID", cols = mycoloursP[6:40], label = TRUE) & NoLegend()
```


Some more plots:

```{r Doublets_2, fig.height=6, fig.width=10}
DimPlot(RC17_norm, reduction = "umap", 
        label = TRUE, 
        group.by = "ClusterID", split.by = "scDblFinder.class", 
        cols = mycoloursP,
        pt.size = 0.6) & NoLegend()
```

```{r Cell-Cycle_2, fig.height=7, fig.width=16}
DimPlot(RC17_norm, reduction = "umap", 
        label = TRUE, 
        group.by = "ClusterID", split.by = "Phase", 
        cols = mycoloursP,
        pt.size = 0.6) & NoLegend()
```

```{r Timepoint, fig.height=7, fig.width=16}
DimPlot(RC17_norm, reduction = "umap", 
        label = TRUE, 
        group.by = "ClusterID", split.by = "Sample", 
        cols = mycoloursP,
        pt.size = 0.6) & NoLegend()
```


```{r}
DimPlot(RC17_norm, reduction = "umap", 
        label = FALSE, 
        group.by = "Sample", 
        cols = mycoloursP[5:40],
        pt.size = 0.6) 
```



### Check how many cells from each timepoint make up each cluster for my chosen resolution.
```{r}
n_cells <- FetchData(RC17_norm, 
                     vars = c("ClusterID", "orig.ident")) %>%
  dplyr::count(ClusterID, orig.ident) %>%
  tidyr::spread(ClusterID, n)
head(n_cells, 5)

#write.csv(n_cells, here("outs", "Processed", "AllCells", "no.ofCellsperCluster.csv"))
```

#### Clean-up metadata:
```{r}
RC17_norm@meta.data$percent.mito <- NULL
RC17_norm@meta.data$LibSize_high <- NULL
RC17_norm@meta.data$LibSize_low <- NULL
RC17_norm@meta.data$low_nFeatures <- NULL
RC17_norm@meta.data$high_nFeatures <- NULL
RC17_norm@meta.data$high_subsets_MitoPerc <- NULL
RC17_norm@meta.data$ScaterQC <- NULL
RC17_norm@meta.data$label <- NULL
RC17_norm@meta.data$scDblFinder.sample <- NULL
RC17_norm@meta.data$scDblFinder.cluster <- NULL
RC17_norm@meta.data$scDblFinder.nearestClass <- NULL
RC17_norm@meta.data$scDblFinder.difficulty <- NULL
RC17_norm@meta.data$scDblFinder.cxds_score <- NULL
RC17_norm@meta.data$scDblFinder.mostLikelyOrigin <- NULL
RC17_norm@meta.data$scDblFinder.weighted <- NULL
RC17_norm@meta.data$scDblFinder.ratio <- NULL
RC17_norm@meta.data$scDblFinder.originAmbiguous <- NULL
RC17_norm@meta.data$RNA_snn_res.0.5 <- NULL
RC17_norm@meta.data$RNA_snn_res.0.6 <- NULL
RC17_norm@meta.data$RNA_snn_res.0.7 <- NULL
RC17_norm@meta.data$RNA_snn_res.1 <- NULL
RC17_norm@meta.data$seurat_clusters <- NULL
RC17_norm@meta.data$Celltype_res.0.8 <- NULL
RC17_norm@meta.data$Cluster_id <- NULL
RC17_norm@meta.data$dataset <- NULL

RC17_norm@meta.data$outlier <- RC17_norm@meta.data$discard
RC17_norm@meta.data$discard <- NULL

RC17_norm@meta.data$Dataset <- "RC17_Monolayer_AllCells"
```


```{r}
#saveRDS(RC17_norm, here("data", "Processed", "AllCells", "RC17_AllCells_srt.rds"))
```


```{r}
sessionInfo()
```